In [1]:
    
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import math
import scipy.io
from scipy.special import expit
from math import *
from scipy import optimize
from sklearn import svm
    
In [2]:
    
sns.set_style('whitegrid')
%matplotlib inline
    
In [158]:
    
sigma = 2
# Check Model ( return coef for verified model ) 
def checkModel(model):
    # Initializing the coef
    coef = None
    # Checking Model existence
    if( model is not None ):
        if( model.kernel == 'linear' ):
            if( len(model.coef_.shape) >= 2 ):
                coef = model.coef_[0]
            else:
                coef = model.coef_
        else:
            coef = None
    else:
        
        # Model has some problems
        return { status: False, msg: 'Model has problem', "coef": None }
    
    return { "status": True, "msg": 'Model is correct', "coef": coef}
# Traing SVM
def visualizeBoundry(X, y, model= None):
    # Initializing the coef
    coef = None
    # Checking Model existence
    checkedModel = checkModel(model)
    
    if(checkedModel["status"]):
        coef = checkedModel["coef"]
        
        # Setting range of the X,y
        X_range = np.linspace(min( X.T[1] ) , max( X.T[1] ), 100 )
        y_range = -1 * ( coef[1] * X_range + model.intercept_ ) / coef[2]
        # Converting X_range && y_range to Dataframe
        df = pd.DataFrame( [X_range, y_range] ).T
        df.columns = ['first', 'second']
        # Plotting data
        sns.plt.plot('first', 'second', data= df)
        
    else:
        return checkedModel["msg"]
    
def plotData(X,y):
    pos = ( y == 1 );
    neg = ( y == 0 );
    
    plt.scatter(X[pos].T[0], X[pos].T[1], c='k', marker='+')
    plt.scatter(X[neg].T[0], X[neg].T[1], c='y', marker='o')
    
def visualizeBoundryCountor(X, y, model= None):
    # Initializing the coef
    coef = None
    # Checking Model existence
    checkedModel = checkModel(model)
    
    if(checkedModel["status"]):
        # Setting range of the x,y
        X_range = np.linspace(min( X.T[0] ), max( X.T[0] ), 400)
        y_range = np.linspace(min( X.T[1] ), max( X.T[1] ), 400)
        
        # Creating Z matrix for holding predections
        z = np.zeros( (len(X_range), len(y_range) ) )
        X_meshed, y_meshed = np.meshgrid(X_range, y_range)
        
        z = model.predict(np.c_[ X_meshed.ravel(), y_meshed.ravel() ])
        z = z.reshape( X_meshed.shape )       
        
        plt.figure(figsize=(12,8))
        plt.contourf(X_meshed, y_meshed, z, alpha= 0.2)
        plotData(X,y)
        
        plt.show()
    else:
        return checkedModel["msg"]
    
def gaussianKernel(x1, x2):
    dist = ( x1 - x2 ).T.dot(x1 - x2)
    return np.exp( ( -1 * ( dist ) ) / (2 * ( sigma ** 2 )))
def findBestModel(X,y, Xval, yval):
    # Initializing the Possible values for both C and Sigma
    pValues = np.array([0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]);
    
    # Creating matrix for holding the error of each model
    error = np.zeros((len(pValues) ** 2,1))
    
    # Computing model error for each permutation of the sigma and C
    for i in range(len(pValues)):
        for j in range(len(pValues)):
            # Initializing The Model
            model = svm.SVC(C=pValues[i] ,kernel= 'rbf' ,gamma= 2 * ( pValues[j] ** 2 ))
            
            # Fitting Data to The Model
            model.fit(X,y)
            
            # Computing error of the Model on the Cross Validation Dataset
            error[ i * len(pValues) + j ] = 1 - model.score(Xval, yval)
    
    # Getting the minimum value index in error matrix
    idx = np.argmin(error)
    
    # Finding C, sigma for model with minimum error
    i = np.floor(idx / len(pValues))
    j = idx - i * len(pValues)
    
    C = pValues[int(i)]
    sigma = pValues[int(j)]
    
    return { "C": C,
           "sigma": sigma }
    
In [4]:
    
mat = scipy.io.loadmat('ex6data1.mat');
X = mat['X']   # Transpose for better accesing to columns
y = mat['y'].T[0]    # Transpose for better accesing to columns
    
In [5]:
    
X_bias = np.insert(X,0,1,axis=1)
    
In [6]:
    
X_df = pd.DataFrame(X)
y_df = pd.DataFrame(y)
    
In [7]:
    
df = pd.concat([X_df, y_df],axis=1)
df.columns = ['first', 'second', 'out']
    
In [8]:
    
sns.lmplot(x="first",y= "second",data=df, hue="out", fit_reg=False, size= 8, scatter_kws={'s':80})
    
    Out[8]:
    
In [9]:
    
clf = svm.SVC(C=100, kernel="linear", tol=1e-3)
clf.fit(X_bias, y)
    
    Out[9]:
In [10]:
    
sns.lmplot('first', 'second', data=df, hue='out', fit_reg=False, size= 10, scatter_kws= { 's' : 80})
visualizeBoundry(X_bias, y, clf)
    
    
In [80]:
    
mat = scipy.io.loadmat("ex6data2.mat")
X = mat['X']
y = mat['y'].T[0]
    
In [81]:
    
X_bias = np.insert(X,0,1,axis=1)
    
In [82]:
    
X_df = pd.DataFrame(X)
y_df = pd.DataFrame(y)
    
In [83]:
    
df = pd.concat([X_df, y_df],axis=1)
df.columns = ['first', 'second', 'out']
    
In [84]:
    
sns.lmplot("first", "second", data=df, hue="out", fit_reg=False, size= 8, scatter_kws={'s':80})
    
    Out[84]:
    
In [85]:
    
sigma = 3
gamma = 2 * ( sigma ** 2 )
clf = svm.SVC(C=10, kernel='rbf', gamma=gamma)
clf.fit(X, y)
    
    Out[85]:
In [86]:
    
visualizeBoundryCountor(X, y, clf)
    
    
In [90]:
    
clf.score(X, y)
    
    Out[90]:
In [167]:
    
mat = scipy.io.loadmat('ex6data3.mat')
X = mat["X"]
Xval = mat["Xval"]
y = mat["y"].T[0]
yval = mat["yval"].T[0]
    
In [168]:
    
X_bias = np.insert(X,0,1,axis=1)
Xval_bias = np.insert(Xval,0,1,axis=1)
    
In [169]:
    
X_df = pd.DataFrame(X)
Xval_df = pd.DataFrame(Xval)
y_df = pd.DataFrame(y)
yval_df = pd.DataFrame(yval)
    
In [170]:
    
df = pd.concat([X_df, y_df],axis=1)
df.columns = ['first', 'second', 'out']
    
In [171]:
    
dfval = pd.concat([Xval_df, yval_df],axis=1)
dfval.columns = ['first', 'second', 'out']
    
In [172]:
    
sns.lmplot("first", "second", data=df, hue="out", fit_reg=False, size= 8, scatter_kws={'s':80})
    
    Out[172]:
    
In [173]:
    
findBestModel(X, y, Xval, yval)
    
    Out[173]:
In [187]:
    
clf = svm.SVC(C=10, gamma= 2 * ( 3 ** 2 ), kernel='rbf')
clf.fit(X,y)
    
    Out[187]:
In [188]:
    
visualizeBoundryCountor(X,y, clf)
    
    
In [189]:
    
clf.score(Xval,yval)
    
    Out[189]: